*-------------------------------------------------------------------------------
* Objective: Create Data on Manufacture and Services share in GDP Data
*-------------------------------------------------------------------------------

********************************************************************************
**# Step 1.1: Load data on Manufacturing, Value Added (% of GDP)
/*******************************************************************************
Source: World Bank WDI
Notes: We take 0s as missing values
       Data accessed 01.17.2020
*******************************************************************************/

* Importing Data
clear all
import excel "raw_datasets/manu_sh_gdp.xlsx", sheet("Data") firstrow clear

* Eliminating 2020
drop y2020

* Creating missings
quietly{
	foreach var of varlist y1960-y2019{
	replace `var' = "" if `var' == ".." | `var' == "0"
	}
	destring y*, replace
}

* Rename for consistency
rename CountryCode code_wb

* Generate 2020 and dropping 2019
rename y2019 y2020

* Reshaping
reshape long y, i(code_wb) j(year)

* Rename for consistency
rename y manu_sh_gdp

* Keep only desired countries
merge m:1 code_wb using "processed_datasets/ccode", keepusing(country_wb)
keep if _merge == 3
drop _merge country_wb CountryName SeriesName SeriesCode 

* Saving
save "processed_datasets/dataset_manuserv", replace

********************************************************************************
**# Step 1.2: Services, Value Added (% of GDP)
/*******************************************************************************
Source: World Bank WDI
Notes: We take 0s as missing values
       Data accessed 01.17.2020
*******************************************************************************/

* Importing Data
clear all
import excel "raw_datasets/serv_sh_gdp.xlsx", sheet("Data") firstrow clear

* Eliminating 2020
drop y2020

* Creating missings
quietly{
	foreach var of varlist y1960-y2019{
	replace `var' = "" if `var' == ".." | `var' == "0"
	}
	destring y*, replace
}

* Rename for consistency
rename CountryCode code_wb

* Generate 2020 and dropping 2019
rename y2019 y2020

* Reshaping
reshape long y, i(code_wb) j(year)

* Rename for consistency
rename y serv_sh_gdp

* Merging with main dataset
drop CountryName SeriesName SeriesCode 
tempfile serv_sh_gdp_data
save `serv_sh_gdp_data', replace
use "processed_datasets/dataset_manuserv", clear
merge 1:1 code_wb year using `serv_sh_gdp_data'
keep if _merge == 3
drop _merge
save "processed_datasets/dataset_manuserv", replace

********************************************************************************
**# Step 2.1: Missing Data - Approximate 2020 with 2016/17/18
********************************************************************************

* Manufacturing
sort code_wb year
gen identi = 0
by code_wb: replace identi = 1 if manu_sh_gdp != . & manu_sh_gdp[60] == . & year == 2018
by code_wb: replace manu_sh_gdp = manu_sh_gdp[59] if manu_sh_gdp == . & year == 2020
by code_wb: replace identi = 1 if manu_sh_gdp != . & manu_sh_gdp[60] == . & year == 2017
by code_wb: replace manu_sh_gdp = manu_sh_gdp[58] if manu_sh_gdp == . & year == 2020
by code_wb: replace identi = 1 if manu_sh_gdp != . & manu_sh_gdp[60] == . & year == 2016
by code_wb: replace manu_sh_gdp = manu_sh_gdp[57] if manu_sh_gdp == . & year == 2020
replace manu_sh_gdp = . if identi == 1
drop identi

* Services
sort code_wb year
gen identi = 0
by code_wb: replace identi = 1 if serv_sh_gdp != . & serv_sh_gdp[60] == . & year == 2018
by code_wb: replace serv_sh_gdp = serv_sh_gdp[59] if serv_sh_gdp == . & year == 2020
by code_wb: replace identi = 1 if serv_sh_gdp != . & serv_sh_gdp[60] == . & year == 2017
by code_wb: replace serv_sh_gdp = serv_sh_gdp[58] if serv_sh_gdp == . & year == 2020
by code_wb: replace identi = 1 if serv_sh_gdp != . & serv_sh_gdp[60] == . & year == 2016
by code_wb: replace serv_sh_gdp = serv_sh_gdp[57] if serv_sh_gdp == . & year == 2020
replace serv_sh_gdp = . if identi == 1
drop identi

********************************************************************************
**# Step 2.2: Missing Data - Apply interpolation
********************************************************************************

by code_wb: ipolate manu_sh_gdp year, gen(int_manu_sh_gdp)
replace manu_sh_gdp = int_manu_sh_gdp if manu_sh_gdp == .

by code_wb: ipolate serv_sh_gdp year, gen(int_serv_sh_gdp)
replace serv_sh_gdp = int_serv_sh_gdp if serv_sh_gdp == .

drop int_manu_sh_gdp int_serv_sh_gdp

save "processed_datasets/dataset_manuserv", replace

********************************************************************************
**# Step 2.2: Missing Data - Import UN Data
/*******************************************************************************
We import UN Data on GDP to complement WDI data. In particular we use the var.
'Gross Value Added by Kind of Economic Activity at current prices - US dollars'
We used the measure without construction as it tracks WDI data better
*******************************************************************************/

import delimited "raw_datasets/gdp_un.csv", clear 

* Merging may leave some countries out because of mistmatches, but it includes all the ones we need
rename countryorarea country_wup
replace country_wup = "Swaziland" if country_wup == "Kingdom of Eswatini"
replace country_wup = "Côte d'Ivoire" if country_wup == "CÃ´te d'Ivoire"
replace country_wup = "Iran (Islamic Republic of)" if country_wup == "Iran, Islamic Republic of"

* Rename variables
rename agriculturehuntingforestryfishin agri_fish
rename miningmanufacturingutilitiesisic min_man_uti
rename manufacturingisicd manu
rename constructionisicf construction
rename wholesaleretailtraderestaurantsa wholesale
rename transportstorageandcommunication transport
rename otheractivitiesisicjp other_act
rename totalvalueadded total_va

* Keep countries in our sample
merge m:1 country_wup using "processed_datasets/ccode", keepusing(code_wb)
keep if _merge == 3
drop _merge

* Calculate shares
gen un_manu_sh_gdp = (manu/total_va)*100
gen un_serv_sh_gdp_v1 = ((construction+wholesale+transport+other_act)/total_va)*100
gen un_serv_sh_gdp_v2 = ((wholesale+transport+other_act)/total_va)*100
gen un_serv_sh_gdp_v3 = ((wholesale+transport)/total_va)*100

tempfile gdp_un_data
save `gdp_un_data', replace

use "processed_datasets/dataset_manuserv", clear
merge 1:1 code_wb year using `gdp_un_data', keepusing(un_manu_sh_gdp un_serv_sh_gdp_v1 un_serv_sh_gdp_v2 un_serv_sh_gdp_v3)
drop _merge

********************************************************************************
**# Step 2.3: Missing Data - Case-by-case
/*******************************************************************************
We use UN data to fill missings from WDI in a case by case basis or in cases
where WDI data is very different compared to GJV2016 and other sources
*******************************************************************************/

sort code_wb year

* Taiwan - S: Gross Domestic Product by Kind of Activity - National Statistics of Taiwan
* Notes: Services does not include construction

replace manu_sh_gdp = 32 if code_wb == "TWN" & year == 2020
replace manu_sh_gdp = 33.1 if code_wb == "TWN" & year == 2018
replace manu_sh_gdp = 33.5 if code_wb == "TWN" & year == 2017
replace manu_sh_gdp = 33.2 if code_wb == "TWN" & year == 2016
replace manu_sh_gdp = 31.3 if code_wb == "TWN" & year == 2015
replace manu_sh_gdp = 31.9 if code_wb == "TWN" & year == 2014
replace manu_sh_gdp = 30 if code_wb == "TWN" & year == 2013

replace serv_sh_gdp = 61.6 if code_wb == "TWN" & year == 2020
replace serv_sh_gdp = 60.9 if code_wb == "TWN" & year == 2018
replace serv_sh_gdp = 60.2 if code_wb == "TWN" & year == 2017
replace serv_sh_gdp = 60.1 if code_wb == "TWN" & year == 2016
replace serv_sh_gdp = 60.8 if code_wb == "TWN" & year == 2015
replace serv_sh_gdp = 61.4 if code_wb == "TWN" & year == 2014
replace serv_sh_gdp = 63.5 if code_wb == "TWN" & year == 2013

* Comoros - S: UN DATA
* Notes: We use 2018 as 2020

replace manu_sh_gdp = un_manu_sh_gdp if code_wb == "COM" & year > 2012
replace manu_sh_gdp = 8.5797157 if code_wb == "COM" & year == 2020
replace manu_sh_gdp = . if code_wb == "COM" & year == 2018

* Eritrea - S: UN DATA
* Notes: We use 2018 as 2020

replace manu_sh_gdp = un_manu_sh_gdp if code_wb == "ERI" & year > 2012
replace manu_sh_gdp = 6.025599 if code_wb == "ERI" & year == 2020
replace manu_sh_gdp = . if code_wb == "ERI" & year == 2018

replace serv_sh_gdp = un_serv_sh_gdp_v2 if code_wb == "ERI" & year > 2012
replace serv_sh_gdp = 59.22974 if code_wb == "ERI" & year == 2020
replace serv_sh_gdp = . if code_wb == "ERI" & year == 2018

* Libya - S: UN DATA
* Notes: We use 2018 as 2020

replace manu_sh_gdp = un_manu_sh_gdp if code_wb == "LBY" & year > 2012
replace manu_sh_gdp = 3.654146 if code_wb == "LBY" & year == 2020
replace manu_sh_gdp = . if code_wb == "LBY" & year == 2018

replace serv_sh_gdp = un_serv_sh_gdp_v2 if code_wb == "LBY" & year > 2012
replace serv_sh_gdp = 31.99457 if code_wb == "LBY" & year == 2020
replace serv_sh_gdp = . if code_wb == "LBY" & year == 2018

* Madagascar - S: UN DATA
* Notes: We use 2018 as 2020

replace manu_sh_gdp = un_manu_sh_gdp if code_wb == "MDG" & year > 2012
replace manu_sh_gdp = 10.6809 if code_wb == "MDG" & year == 2020
replace manu_sh_gdp = . if code_wb == "MDG" & year == 2018

* Solomon Islands - S: UN DATA
* Notes: We use 2018 as 2020

replace manu_sh_gdp = un_manu_sh_gdp if code_wb == "SLB" & year > 2012
replace manu_sh_gdp = 9.281924 if code_wb == "SLB" & year == 2020
replace manu_sh_gdp = . if code_wb == "SLB" & year == 2018

replace serv_sh_gdp = un_serv_sh_gdp_v2 if code_wb == "SLB" & year > 2012
replace serv_sh_gdp = 59.35611 if code_wb == "SLB" & year == 2020
replace serv_sh_gdp = . if code_wb == "SLB" & year == 2018

* Somalia - S: UN DATA
* Notes: We use 2018 as 2020

replace manu_sh_gdp = un_manu_sh_gdp if code_wb == "SOM" & year > 2012
replace manu_sh_gdp = 2.484785 if code_wb == "SOM" & year == 2020
replace manu_sh_gdp = . if code_wb == "SOM" & year == 2018

replace serv_sh_gdp = un_serv_sh_gdp_v2 if code_wb == "SOM" & year > 2012
replace serv_sh_gdp = 32.46075 if code_wb == "SOM" & year == 2020
replace serv_sh_gdp = . if code_wb == "SOM" & year == 2018

* Syria - S: UN DATA
* Notes: We use 2018 as 2020

replace manu_sh_gdp = un_manu_sh_gdp if code_wb == "SYR" & year > 2012
replace manu_sh_gdp = 4.681261 if code_wb == "SYR" & year == 2020
replace manu_sh_gdp = . if code_wb == "SYR" & year == 2018

replace serv_sh_gdp = un_serv_sh_gdp_v2 if code_wb == "SYR" & year > 2012
replace serv_sh_gdp = 49.278 if code_wb == "SYR" & year == 2020
replace serv_sh_gdp = . if code_wb == "SYR" & year == 2018

* Venezuela - S: UN DATA
* Notes: We use 2018 as 2020

replace manu_sh_gdp = un_manu_sh_gdp if code_wb == "VEN" & year > 2012
replace manu_sh_gdp = 15.83781 if code_wb == "VEN" & year == 2020
replace manu_sh_gdp = . if code_wb == "VEN" & year == 2018

replace serv_sh_gdp = un_serv_sh_gdp_v2 if code_wb == "VEN" & year > 2012
replace serv_sh_gdp = 58.72645 if code_wb == "VEN" & year == 2020
replace serv_sh_gdp = . if code_wb == "VEN" & year == 2018

* Yemen - S: UN DATA
* Notes: We use 2018 as 2020

replace manu_sh_gdp = un_manu_sh_gdp if code_wb == "YEM" & year > 2012
replace manu_sh_gdp = 10.91051 if code_wb == "YEM" & year == 2020
replace manu_sh_gdp = . if code_wb == "YEM" & year == 2018

* Sudan - S: UN DATA
* Notes: We use 2018 as 2020
* Notes: We combine South Sudan and Sudan to create a single country

replace manu_sh_gdp = 6.6 if code_wb == "SDN" & year == 2020
replace manu_sh_gdp = . if code_wb == "SDN" & year == 2018
replace manu_sh_gdp = 10.1 if code_wb == "SDN" & year == 2017
replace manu_sh_gdp = 7.4 if code_wb == "SDN" & year == 2016
replace manu_sh_gdp = 6.1 if code_wb == "SDN" & year == 2015
replace manu_sh_gdp = 6.0 if code_wb == "SDN" & year == 2014
replace manu_sh_gdp = 7.1 if code_wb == "SDN" & year == 2013

replace serv_sh_gdp = 58.5 if code_wb == "SDN" & year == 2020
replace serv_sh_gdp = . if code_wb == "SDN" & year == 2018
replace serv_sh_gdp = 56 if code_wb == "SDN" & year == 2017
replace serv_sh_gdp = 59.3 if code_wb == "SDN" & year == 2016
replace serv_sh_gdp = 53.7 if code_wb == "SDN" & year == 2015
replace serv_sh_gdp = 50.5 if code_wb == "SDN" & year == 2014
replace serv_sh_gdp = 48.5 if code_wb == "SDN" & year == 2013

drop if code_wb == "SSD"

* ------------------------------------------------------------------------------
* Corrections 

* Algeria - S: UN DATA
* Notes: We use 2018 as 2020

replace manu_sh_gdp = 4.4 if code_wb == "DZA" & year == 2020
replace manu_sh_gdp = . if code_wb == "DZA" & year == 2018
replace manu_sh_gdp = 4.6 if code_wb == "DZA" & year == 2017
replace manu_sh_gdp = 4.6 if code_wb == "DZA" & year == 2016
replace manu_sh_gdp = 4.6 if code_wb == "DZA" & year == 2015
replace manu_sh_gdp = 4.0 if code_wb == "DZA" & year == 2014
replace manu_sh_gdp = 3.8 if code_wb == "DZA" & year == 2013

replace serv_sh_gdp = 46.6 if code_wb == "DZA" & year == 2020
replace serv_sh_gdp = . if code_wb == "DZA" & year == 2018
replace serv_sh_gdp = 48.7 if code_wb == "DZA" & year == 2017
replace serv_sh_gdp = 51.2 if code_wb == "DZA" & year == 2016
replace serv_sh_gdp = 50.6 if code_wb == "DZA" & year == 2015
replace serv_sh_gdp = 45.5 if code_wb == "DZA" & year == 2014
replace serv_sh_gdp = 43.8 if code_wb == "DZA" & year == 2013

* Gabon - S: UN DATA
* Notes: We use 2018 as 2020

replace manu_sh_gdp = 7.3 if code_wb == "GAB" & year == 2020
replace manu_sh_gdp = . if code_wb == "GAB" & year == 2018
replace manu_sh_gdp = 7.3 if code_wb == "GAB" & year == 2017
replace manu_sh_gdp = 7.8 if code_wb == "GAB" & year == 2016
replace manu_sh_gdp = 6.7 if code_wb == "GAB" & year == 2015
replace manu_sh_gdp = 6.3 if code_wb == "GAB" & year == 2014
replace manu_sh_gdp = 5.7 if code_wb == "GAB" & year == 2013

replace serv_sh_gdp = 45.3 if code_wb == "GAB" & year == 2020
replace serv_sh_gdp = . if code_wb == "GAB" & year == 2018
replace serv_sh_gdp = 46.4 if code_wb == "GAB" & year == 2017
replace serv_sh_gdp = 46.2 if code_wb == "GAB" & year == 2016
replace serv_sh_gdp = 43.4 if code_wb == "GAB" & year == 2015
replace serv_sh_gdp = 39.4 if code_wb == "GAB" & year == 2014
replace serv_sh_gdp = 35.7 if code_wb == "GAB" & year == 2013

* Jamaica - S: UN DATA
* Notes: We use 2018 as 2020

replace serv_sh_gdp = 68.8 if code_wb == "JAM" & year == 2020
replace serv_sh_gdp = . if code_wb == "JAM" & year == 2018
replace serv_sh_gdp = 69.6 if code_wb == "JAM" & year == 2017
replace serv_sh_gdp = 70.4 if code_wb == "JAM" & year == 2016
replace serv_sh_gdp = 70.8 if code_wb == "JAM" & year == 2015
replace serv_sh_gdp = 72.3 if code_wb == "JAM" & year == 2014
replace serv_sh_gdp = 72.8 if code_wb == "JAM" & year == 2013

* Nicaragua - S: UN DATA
* Notes: We use 2018 as 2020

replace serv_sh_gdp = 54.9 if code_wb == "NIC" & year == 2020
replace serv_sh_gdp = . if code_wb == "NIC" & year == 2018
replace serv_sh_gdp = 55.6 if code_wb == "NIC" & year == 2017
replace serv_sh_gdp = 54.9 if code_wb == "NIC" & year == 2016
replace serv_sh_gdp = 53.7 if code_wb == "NIC" & year == 2015
replace serv_sh_gdp = 53.7 if code_wb == "NIC" & year == 2014
replace serv_sh_gdp = 53.6 if code_wb == "NIC" & year == 2013

* Yemen - S: UN DATA
* Notes: We use 2018 as 2020

replace serv_sh_gdp = 60.2 if code_wb == "YEM" & year == 2020
replace serv_sh_gdp = . if code_wb == "YEM" & year == 2018
replace serv_sh_gdp = 60.7 if code_wb == "YEM" & year == 2017
replace serv_sh_gdp = 60.6 if code_wb == "YEM" & year == 2016
replace serv_sh_gdp = 61.7 if code_wb == "YEM" & year == 2015
replace serv_sh_gdp = 50.0 if code_wb == "YEM" & year == 2014
replace serv_sh_gdp = 48.6 if code_wb == "YEM" & year == 2013

* Equatorial Guinea - S: Staticts Deparment from Equatorial Guinea
* Notes: We use 2018 as 2020 and 2014 as 2013

replace manu_sh_gdp = 25.4 if code_wb == "GNQ" & year == 2020
replace manu_sh_gdp = . if code_wb == "GNQ" & year == 2018
replace manu_sh_gdp = 25.4 if code_wb == "GNQ" & year == 2017
replace manu_sh_gdp = 18.5 if code_wb == "GNQ" & year == 2016
replace manu_sh_gdp = 17.8 if code_wb == "GNQ" & year == 2015
replace manu_sh_gdp = . if code_wb == "GNQ" & year == 2014
replace manu_sh_gdp = 20.1 if code_wb == "GNQ" & year == 2013

replace serv_sh_gdp = 39.3 if code_wb == "GNQ" & year == 2020
replace serv_sh_gdp = . if code_wb == "GNQ" & year == 2018
replace serv_sh_gdp = 40 if code_wb == "GNQ" & year == 2017
replace serv_sh_gdp = 43.9 if code_wb == "GNQ" & year == 2016
replace serv_sh_gdp = 37.8 if code_wb == "GNQ" & year == 2015
replace serv_sh_gdp = . if code_wb == "GNQ" & year == 2014
replace serv_sh_gdp = 27.9 if code_wb == "GNQ" & year == 2013

*-------------------------------------------------------------------------------
* Replacing missings with interpolation

sort code_wb year
by code_wb: ipolate manu_sh_gdp year, gen(int_manu_sh_gdp)
replace manu_sh_gdp = int_manu_sh_gdp if manu_sh_gdp == .
by code_wb: ipolate serv_sh_gdp year, gen(int_serv_sh_gdp)
replace serv_sh_gdp = int_serv_sh_gdp if serv_sh_gdp == .
drop int_manu_sh_gdp int_serv_sh_gdp

********************************************************************************
**# Step 3: Finalize Data - Calculate moving averages
*******************************************************************************/

* Sorting
sort code_wb year
foreach movav in manu_sh_gdp serv_sh_gdp{
	
	* Moving Average (-1/+1)
	
	* Generating Variable ma1
	gen `movav'_ma1 = 0
	
	* Estimating ma1 for the sample except 1960 and 2020
	foreach ma1 of numlist 2/59{
		by code_wb: replace `movav'_ma1 = (`movav'[`ma1'-1] + `movav'[`ma1'] + `movav'[`ma1'+1])/3 if _n == `ma1' 
}
	
	* Estimating ma1 for 1960 and 2020
	by code_wb: replace `movav'_ma1 = (`movav'[1] + `movav'[2])/2 if _n == 1
	by code_wb: replace `movav'_ma1 = (`movav'[59] + `movav'[60])/2 if _n == 60

	* Moving Average (-2/+2)

	* Generating Variable ma2
	gen `movav'_ma2 = 0

	foreach ma2 of numlist 3/58{
		by code_wb: replace `movav'_ma2 = ///
		(`movav'[`ma2'-2] + `movav'[`ma2'-1] + `movav'[`ma2'] + `movav'[`ma2'+1] + `movav'[`ma2'+2])/5 ///
		if _n == `ma2' 
}
	
	* Estimating ma2 for 1960, 1961, 2018 and 2020
	by code_wb: replace `movav'_ma2 = (`movav'[1] + `movav'[2] + `movav'[3])/3 if _n == 1
	by code_wb: replace `movav'_ma2 = (`movav'[1] + `movav'[2] + `movav'[3] + `movav'[4])/4 if _n == 2
	by code_wb: replace `movav'_ma2 = (`movav'[57] + `movav'[58] + `movav'[59] + `movav'[60])/4 if _n == 59
	by code_wb: replace `movav'_ma2 = (`movav'[58] + `movav'[59] + `movav'[60])/3 if _n == 60

}

********************************************************************************
**# Step 3: Finalize Data - Combine with GJV2016
*******************************************************************************/

keep if year == 2015 | year == 2020

* Mergin with GJV2016 data to generate values pre-2015

rename code_wb ccode
replace ccode = "ZAR" if ccode == "COD"
merge 1:1 ccode year using "raw_datasets/gjv", keepusing(mfg_gdp2010 mfgpanel serv_gdp2010 servpanel)
drop _merge
rename ccode code_wb
replace code_wb = "COD" if code_wb == "ZAR"

* Finalize Manufacturing variable by combining with GJV2016

gen manu_sh_gdp_c = mfgpanel
gen manu_sh_gdp_c_ma1 = mfgpanel
gen manu_sh_gdp_c_ma2 = mfgpanel

replace manu_sh_gdp_c = mfg_gdp2010 if year == 2010
replace manu_sh_gdp_c_ma1 = mfg_gdp2010 if year == 2010
replace manu_sh_gdp_c_ma2 = mfg_gdp2010 if year == 2010

replace manu_sh_gdp_c = manu_sh_gdp if year > 2011
replace manu_sh_gdp_c_ma1 = manu_sh_gdp_ma1 if year > 2011
replace manu_sh_gdp_c_ma2 = manu_sh_gdp_ma2 if year > 2011

* Finalize Services variable by combining with GJV2016

gen serv_sh_gdp_c = servpanel
gen serv_sh_gdp_c_ma1 = servpanel
gen serv_sh_gdp_c_ma2 = servpanel

replace serv_sh_gdp_c = serv_gdp2010 if year == 2010
replace serv_sh_gdp_c_ma1 = serv_gdp2010 if year == 2010
replace serv_sh_gdp_c_ma2 = serv_gdp2010 if year == 2010

replace serv_sh_gdp_c = serv_sh_gdp if year > 2011
replace serv_sh_gdp_c_ma1 = serv_sh_gdp_ma1 if year > 2011
replace serv_sh_gdp_c_ma2 = serv_sh_gdp_ma2 if year > 2011

save "processed_datasets/dataset_manuserv", replace
